This page shows the plots of covid case rate per 100k by state since the epidemic.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(naniar)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
knitr::opts_chunk$set(
fig.width = 8,
fig.asp = .8,
out.width = "90%"
)
First, let’s input the CDC covid cases data by state with total time period till Thu Nov 18 2021.
[The data generated: Thu Nov 18 2021 22:02:06 GMT-0500 (EST)]
# input data table for Total Cases by State/Territory
covid_total_by_state =
read_csv("./data/united_states_covid19_cases_deaths_and_testing_by_state.csv", skip = 2) %>%
janitor::clean_names()
## Rows: 62 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (25): State/Territory, Level of Community Transmission, Total Cases, Con...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
For the missing data, the original dataset fill it with “N/A”, so we replace it with NA and change the variable to numeric.
# figure out the missing data in data frame
na_strings = "N/A"
# fill in missing data
covid_total_by_state_tidy =
covid_total_by_state %>%
replace_with_na_all(condition = ~.x %in% na_strings) %>%
mutate(
state_territory = as.factor(state_territory),
case_rate_per_100000 = as.numeric(case_rate_per_100000)
) %>%
rename(region = "state_territory") %>%
select(region, case_rate_per_100000) %>%
mutate(region = recode(region, "New York*" = "New York"))
Let’s make a choropleth map plot to show the total covid cases by state in US.
postal_code =
read_csv("./data/us_postal_code.csv") %>%
janitor::clean_names()
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): State, Abbrev, Code
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#
plot_df =
left_join(postal_code, covid_total_by_state_tidy, by = c("state" = "region")) %>%
relocate(code)
2.. Draw the plotly.
# specify some map projection/options
g = list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
plot_df$hover = with(plot_df, paste(
state, '<br>')
)
map_plotly1 =
plot_geo(plot_df, locationmode = 'USA-states') %>%
add_trace(
z = ~case_rate_per_100000, text = ~hover, locations = ~code,
color = ~case_rate_per_100000, colors = 'Purples'
) %>%
colorbar(title = "Case Rate Per 100K") %>%
layout(
title = "COVID-19 Case Rate by State/Territory (cases per 100,000)",
geo = g
)
The following data frame is the covid case rate per 100k till Dec.31.2020.
# read data function
read_data_function = function(file_df) {
state_df =
read_csv(file_df, skip = 2)
return(state_df)
}
# create a data frame containing all participants
state_2020 =
tibble(
files = list.files("./data/state_covid_separate")
) %>%
mutate(
path = map(.x = files, ~paste("./data/state_covid_separate", ., sep = "/"))
) %>% # add path
mutate(
observations = map(path, read_data_function))
Let’s unnest the data frame and filter the cases on Dec.31.2020
state_2020_tidy =
state_2020 %>%
unnest(cols = "observations") %>%
janitor::clean_names() %>%
filter(date == "Dec 31 2020") %>%
select(state, total_cases, total_case_rate_per_100k)
Make map plot.
plot_2020_df = left_join(state_2020_tidy, postal_code, by = "state")
# specify some map projection/options
g = list(
scope = 'usa',
projection = list(type = 'albers usa'),
showlakes = TRUE,
lakecolor = toRGB('white')
)
plot_2020_df$hover = with(plot_2020_df, paste(
state, '<br>', "Total case", total_cases, "<br>")
)
map_plotly2 =
plot_geo(plot_2020_df, locationmode = 'USA-states') %>%
add_trace(
z = ~total_case_rate_per_100k, text = ~hover, locations = ~code,
color = ~total_case_rate_per_100k, colors = 'Purples'
) %>%
colorbar(title = "Case Rate Per 100K") %>%
layout(
title = "2020 COVID-19 Case Rate by State/Territory (cases per 100,000)",
geo = g
)